* This dofile compiles the heterogeneity dataset for Section 5.4

cd /Users/fcoell/Dropbox/PATSTAT/
global tmp "/tmp"
global dta "/Users/fcoell/Dropbox/PATSTAT/DATA/Ctry_heterog_data/"
global first "1986"

capture set more off

* -----------------------------------------------------------------------------
* Preliminary: Get list of countries
* -----------------------------------------------------------------------------
use ../PATSTAT_spring2015/tls801/tls801, clear
replace iso_alpha3 = strtrim(iso_alpha3)
drop if missing(iso_alpha3)
drop if ctry_code=="DL"
/* DDR has 2 country codes in Patstat: DD, DL. Only DD is in our dataset. */
save $tmp/tmp801, replace

/* use DATA/reg_variables5Granted, clear */
use DATA/reg_variables6Granted_WTO, clear
keep if wto1995 == 1
contract headq
drop _freq
rename headq ctry_code
merge 1:1 ctry_code using $tmp/tmp801, keep(match)
assert state_indicator=="Y"
rename ctry_code headq
keep headq iso_alpha3 continent st3_name
save $tmp/ctry_list, replace


* -----------------------------------------------------------------------------
* Group countries by Workd Bank income group
* -----------------------------------------------------------------------------
import excel $dta/OGHIST.xls, sheet("Income_groups") firstrow clear
keep iso_alpha3 country C
label var C "World Bank income group in 1987"
replace C = strtrim(C) // C corresponds to 1987 (first available year)
tab C
replace C = "" if C == ".."
drop if C == ""
rename C income_group_WB
/* tab income_group_WB, gen(income_group_WB) */
merge 1:1 iso_alpha3 using $tmp/ctry_list, keep(match using)
list st3_name if _merge==2
keep if _merge==3
keep income_group_WB* headq
order headq
save $tmp/cat_WB_income_group, replace


* -----------------------------------------------------------------------------
* Group countries by initial GDP
* -----------------------------------------------------------------------------
global ppp "_ppp"
import delimited $dta/gdp_pcap${ppp}.csv, varnames(1) encoding(utf8) clear
rename (iso3c date value) (iso_alpha3 year gdp_pcap${ppp})
keep iso_alpha3 year gdp_pcap${ppp} country
sum year
local min = r(min)
keep if year==`min'
/* keep if year==$first */
merge 1:1 iso_alpha3 using $tmp/ctry_list, keep(match using)
tab _merge
list st3_name if _merge==2
keep if _merge==3

xtile gdp_pcap${ppp}_quart = gdp_pcap${ppp}, nq(4)
egen mean_gdp_pcap${ppp} = mean(gdp_pcap${ppp}), by(gdp_pcap${ppp}_quart)
bys gdp_pcap${ppp}_quart: tab country
/* tab quart, gen(gdp${ppp}_quart) */

gen tmp = "$ppp"
gen ppp = tmp == "_ppp"

forvalues i = 0/1 {
    if ppp == 0 {
        label var mean_gdp_pcap${ppp} "Avg. GDP per capita in country's quartile in $first"
        label var gdp_pcap${ppp} "GDP per capita (constant 2010 US$) in $first"

    }
    else {
        label var mean_gdp_pcap${ppp} "Avg. GDP per capita (PPP) in country's quartile in $first"
        label var gdp_pcap${ppp} "GDP per capita, PPP (constant 2011 international $) in $first"

    }
}

keep gdp_pcap${ppp}* headq mean_gdp_pcap${ppp} 
order headq
save $tmp/cat_gdp_pcap${ppp}, replace


* -----------------------------------------------------------------------------
* Group countries by initial trade (% of GDP)
* -----------------------------------------------------------------------------
import delimited $dta/trade.csv, varnames(1) encoding(utf8) clear
rename (iso3c date value) (iso_alpha3 year trade_gdp_share)
keep iso_alpha3 year trade_gdp_share country
keep if year==$first
merge 1:1 iso_alpha3 using $tmp/ctry_list, keep(match using)
tab _merge
list st3_name if _merge==2
keep if _merge==3

xtile trade_quart = trade_gdp_share, nq(4)
egen mean_trade_gdp_share = mean(trade_gdp_share), by(trade_quart)
bys trade_quart: tab country
label var trade_gdp_share "Trade (% of GDP) in $first"
label var mean_trade_gdp_share "Avg. trade (% of GDP) in country's quartile in $first"

keep trade_gdp_share headq mean_trade_gdp_share trade_quart*
order headq
save $tmp/cat_trade, replace


* -----------------------------------------------------------------------------
* Group countries by initial export (% of GDP)
* -----------------------------------------------------------------------------
import delimited $dta/export_share.csv, varnames(1) encoding(utf8) clear
rename (iso3c date value) (iso_alpha3 year export)
keep iso_alpha3 year export country
keep if year==$first
merge 1:1 iso_alpha3 using $tmp/ctry_list, keep(match using)
tab _merge
list st3_name if _merge==2
keep if _merge==3

xtile export_quart = export, nq(4)
egen mean_export = mean(export), by(export_quart)
bys export_quart: tab country
label var export "Exports of goods and services (% of GDP) in $first"
label var mean_export "Avg. export (% of GDP) in country's quartile in $first"

keep export headq mean_export export_quart*
order headq
save $tmp/cat_export, replace


* -----------------------------------------------------------------------------
* Group countries by R&D expenditures
* -----------------------------------------------------------------------------
import delimited $dta/RDexp.csv, varnames(1) encoding(utf8) clear
rename (iso3c date value) (iso_alpha3 year RDexp)
keep iso_alpha3 year RDexp country
sum(year)
local min = r(min)
keep if year==`min'
merge 1:1 iso_alpha3 using $tmp/ctry_list, keep(match using)
tab _merge
list st3_name if _merge==2
keep if _merge==3

xtile RD_quart = RDexp, nq(4)
egen mean_RDexp = mean(RDexp), by(RD_quart)
bys RD_quart: tab country
label var RDexp "Research and development expenditure (% of GDP) in `min'"
label var mean_RDexp "Avg. R&D expenditure in country's quartile in `min'"

keep RDexp headq mean_RDexp RD_quart*
order headq
save $tmp/cat_RDexp, replace


* -----------------------------------------------------------------------------
* Group countries by patent stock
/* Do this by country and industry, using all firms with patents, not just 
those with weights that end up in our final sample */
* -----------------------------------------------------------------------------
// Identify granted patents
use ../PATSTAT_spring2015/tls211/tls211, clear
keep appln_id publn_first_grant
/* duplicates examples appln */
collapse (max) publn, by(appln)
save $tmp/granted, replace

// Keep relevant period
use DATA/pat_hldr3, clear // From firm_patents2.do
keep if inrange(y, 1965, 1985)

// Keep relevant countries
merge m:1 headq using $tmp/ctry_list
tab _merge
keep if _merge==3
drop _merge

// Get citations per application
by hrm headq appln_auth appln_id, sort: g unq=_n==1
drop if unq==0
ren appln_id cited_appln_id
merge m:1 cited_appln_id using DATA/forward_citations, keep(match master)
replace citations=0 if _merge==1
replace citations3year=0 if _merge==1
ren cited_appln_id appln_id 
drop _merge

// Get patent families
merge m:1 appln_id using ../PATSTAT_spring2015/tls218/tls218, keep(match master)
drop _merge

// Identify granted patents
merge m:1 appln_id using $tmp/granted, keep(match master)
drop _merge

// Count patents by country and year
collapse (sum) citations* (min) y (max) granted=publn_first_grant, by(headq docdb)
collapse (count) p=docdb (sum) granted (mean) citations*, by(headq y)

// Cumulative sum 
encode headq, gen(hq)
save $tmp/headqpatents, replace
contract headq hq
drop _freq
save $tmp/tmpheadq, replace

use $tmp/headqpatents, clear
tsset hq y
tsfill, full
foreach v of varlist p granted citations citations3year {
  replace `v'=0 if `v'==.
}

drop p
rename granted p
bys hq: gen P = sum(p)
bys hq: gen C = sum(p*citations3year)
gen Cbar = C/P

drop headq
merge m:1 hq using $tmp/tmpheadq, assert(match)
drop _merge

// Keep patent stock up to 1985
keep P C Cbar headq y
order headq y
keep if y == 1985
save $tmp/cat_Pctry, replace 

// Quartiles of Patent stock
egen P85_world = total(P)
gen P85_ctry = P/P85_world

xtile Pctry_quart = P85_ctry, nq(4)
xtile Cbar_ctry_quart = Cbar, nq(4)

merge 1:1 headq using $tmp/ctry_list, keep(match using)
bys Pctry_quart: tab st3_name
bys Cbar_ctry_quart: tab st3_name
drop _merge y P C  P85_world
save $tmp/cat_Pctry, replace 


* -----------------------------------------------------------------------------
* Combine together
* -----------------------------------------------------------------------------
use $tmp/ctry_list, clear
merge 1:1 headq using $tmp/cat_WB_income_group
drop _merge
merge 1:1 headq using $tmp/cat_gdp_pcap_ppp
drop _merge
merge 1:1 headq using $tmp/cat_gdp_pcap
drop _merge
merge 1:1 headq using $tmp/cat_trade
drop _merge
merge 1:1 headq using $tmp/cat_export
drop _merge
merge 1:1 headq using $tmp/cat_RDexp
drop _merge 
merge 1:1 headq using $tmp/cat_Pctry
drop _merge 

save DATA/ctry_heterog, replace

/* Clean up */
rm $tmp/cat_WB_income_group.dta
rm $tmp/cat_gdp_pcap_ppp.dta
rm $tmp/cat_gdp_pcap.dta
rm $tmp/cat_trade.dta
rm $tmp/cat_export.dta
rm $tmp/cat_RDexp.dta
rm $tmp/cat_Pctry.dta
rm $tmp/tmp801.dta


* -----------------------------------------------------------------------------
* Firm level heterogeneity
* -----------------------------------------------------------------------------
// Firm size 
/* (ex-ante patent stock rel to average patent stock in country and industry) */
global Granted "Granted"
/* use data/reg_variables5${Granted}, clear */
use data/reg_variables6${Granted}_WTO, clear
keep if wto1995 == 1
keep if y == 1992
contract hrm_l2_id headq nace2_1 P85 Cbar85 ncty
gen C85 = Cbar85*P85
foreach var of varlist P85 Cbar85 ncty C85 {
    egen `var'_headq_ind = mean(`var'), by(headq nace2_1)
} 
foreach var of varlist P85* Cbar85* C85* {
    gen ln`var' = ln(`var')
}
gen size1 = P85/P85_headq_ind
gen size2 = Cbar85/Cbar85_headq_ind
gen size3 = C85/C85_headq_ind
gen size4 = ncty/ncty_headq_ind


// Quartiles of firm size within country-industry
egen size1_quart = xtile(size1), n(4) by(headq nace2_1)
egen size2_quart = xtile(size2), n(4) by(headq nace2_1)
egen size3_quart = xtile(size3), n(4) by(headq nace2_1)
egen size4_quart = xtile(size4), n(4) by(headq nace2_1)

keep hrm_l2_id size*
save $tmp/firm_size, replace


* -----------------------------------------------------------------------------
* Compile heterogeneity dataset
* -----------------------------------------------------------------------------

global Granted "Granted"
*global Granted ""
global z "L8taut"
global x "L8tautApp"

/* use data/reg_variables5${Granted}, clear */
use data/reg_variables6${Granted}_WTO, clear
keep if wto1995 == 1

// Drop headq that are not countries
merge m:1 headq using $tmp/ctry_list, keep(match)
drop _merge

// Merge country heterogeneity measures
merge m:1 headq using DATA/ctry_heterog
drop _merge

// Merge firm heterogemeity measures
merge m:1 hrm_l2_id using $tmp/firm_size

// Country heterogeneity quartiles
tab income_group_WB, gen(wb)
forv o=1/4 {
	local cat : word `o' of H L LM UM 
	gen ${x}_`cat' = $x * wb`o' 
    gen ${z}_`cat' = $z * wb`o' 
}
drop wb?

tab gdp_pcap_quart, gen(gdp_pcap_q)
tab gdp_pcap_ppp_quart, gen(gdp_ppp_q)
tab trade_quart, gen(trade_q)
tab export_quart, gen(export_q)
tab RD_quart, gen(RD_q)
tab Pctry_quart, gen(Pctry_q)
tab Cbar_ctry_quart, gen(Cbar_ctry_q)

foreach var of varlist $x $z {
    forv i=1/4 {
    	gen `var'_gdp_pcap_q`i' = `var' * gdp_pcap_q`i' 
    	gen `var'_gdp_ppp_q`i' = `var' * gdp_ppp_q`i' 
    	gen `var'_trade_q`i' = `var' * trade_q`i' 
    	gen `var'_export_q`i' = `var' * export_q`i' 
    	gen `var'_RD_q`i' = `var' * RD_q`i'
        gen `var'_Pctry_q`i' = `var' * Pctry_q`i'
        gen `var'_Cbar_ctry_q`i' = `var' * Cbar_ctry_q`i'
    }
}
drop gdp_pcap_q? gdp_ppp_q? trade_q? export_q? RD_q? Pctry_q? Cbar_ctry_q?

// Firm level heterogeneity measures and quartiles
foreach var of varlist $x $z {
    forvalues i = 1/4 {
        gen `var'_size`i' = `var' * size`i'
    }
}

forvalues i = 1/4 {
    tab size`i'_quart, gen(size`i'_q)
}

foreach var of varlist $x $z {
    forvalues i = 1/4 {
        forvalues j = 1/4 {
            gen `var'_size`i'_q`j' = `var' * size`i'_q`j'
        }
    }
}

save data/heterogeneity_variables, replace
